##############################################################################
# Filename: Robustness_Comprehension.R
# Purpose: Produce SI Figure A5 
##############################################################################

source("Setup.R")

# Check distribution of clicks on the article page 
  # clicks.distr = as.data.frame(table(data_eval$Clicks_Article_Page))
  # t$cumulative_percentage = 100*cumsum(clicks.distr$Freq)/sum(clicks.distr$Freq)
  # View(clicks.distr)

  # 20% took 33 clicks or less
  # 40% took 48 clicks or less
  # 60% took 64 clicks or less
  # 80% took 84 clicks or less

# Set up groups based on the percentiles noted above
data_eval$Clicks_Article_Page_group1 = ifelse(data_eval$Clicks_Article_Page < 34, 1, 0)
data_eval$Clicks_Article_Page_group2 = ifelse(data_eval$Clicks_Article_Page < 49, 1, 0)
data_eval$Clicks_Article_Page_group3 = ifelse(data_eval$Clicks_Article_Page < 65, 1, 0)
data_eval$Clicks_Article_Page_group4 = ifelse(data_eval$Clicks_Article_Page < 85, 1, 0)
data_eval$Clicks_Article_Page_group5 = ifelse(data_eval$Clicks_Article_Page <= max(data_eval$Clicks_Article_Page), 1, 0)

# Produce estimates 
depvars = c("YesCorrect","YesWellWritten","YesRecommend")
percentilelabels = seq(20,100,by=20)
results.all = data.frame()
for (j in 1:length(depvars)){
  coef = numeric()
  stderr = numeric()
  pvalue = numeric()
  for (i in 1:5){
    gr = paste("Clicks_Article_Page_group",i,sep="")
    data_eval$gr = data_eval[,which(names(data_eval)==gr)]
    data_eval$dv = data_eval[,which(names(data_eval)==depvars[j])]
    reg = lm(dv ~ ProBJP*Article + CollegeGrad + NewsDaily + StrongInterestPolitics, 
             data = subset(data_eval, gr == 1))
    coef[i] = summary(reg)$coefficients[7,1]
    stderr[i] = summary(reg)$coefficients[7,2]
    pvalue[i] = summary(reg)$coefficients[7,4]
  }
  res = as.data.frame(cbind(1:5,coef,stderr,pvalue))
  res$outvariable = depvars[j]
  results.all = rbind(results.all,res)
}
results.all$outvariable = substr(results.all$outvariable,4,nchar(results.all$outvariable))
results.all$outvariable = ifelse(results.all$outvariable=="WellWritten","Well-Written",results.all$outvariable)
results.all$outvariable = factor(results.all$outvariable, levels=c("Correct","Well-Written","Recommend"))
results.all$baselinecoefs = case_when(results.all$outvariable=="Correct" ~ 0.5457861,
                                  results.all$outvariable=="Well-Written" ~ 0.2298600,
                                  results.all$outvariable=="Recommend" ~ 0.3630208)

# Plot
ggplot(results.all, aes(x = coef, y = V1)) +
  facet_wrap(~ outvariable) +
  geom_point(size = 1) +
  geom_errorbar(aes(xmin  = coef - stderr,
                    xmax  = coef + stderr),
                    width = 0.2,
                    size  = 0.7) +
  scale_x_continuous(limits = c(0,0.8)) +  
  scale_y_continuous(limits = c(0.9,5.1), breaks = c(1,2,3,4,5), labels = percentilelabels) +  
  geom_vline(aes(xintercept = baselinecoefs), color = "red") +
  labs(x = expression(Coefficient~estimate~of~italic("Pro-BJP × Positive Article")), 
       y = "Percentile of the number of clicks") +
  theme_bw() + 
  theme(plot.title = element_text(family = "sans", size=13, hjust=0.5),
        axis.title = element_text(family = "sans", size=11),
        axis.text.x = element_text(family = "sans", size=10),
        plot.background = element_rect(fill="white",color="white"),
        panel.spacing = unit(1, "lines"))
# ggsave("clicksrobust_articlemcq.png", width = 7, height = 3)